from vllm import LLM, SamplingParams

from llm_vllm_data import industry_infos

import sys

sys.path.append("..")
from utils import save_obj


prompts = []

sampling_params = SamplingParams(
    temperature=0.7,
    top_p=0.95,
    max_tokens=128,
    stop="<|im_end|>",
)

model_path = "/home/jie/.cache/modelscope/hub/qwen/Qwen1___5-14B-Chat"


llm = LLM(
    model=model_path,
    trust_remote_code=True,
    tokenizer=model_path,
    tokenizer_mode="auto",
    max_model_len=16016,
    enforce_eager=True,
)

template = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{info}<|im_end|>\n<|im_start|>assistant\n"
data = []

for item in industry_infos:
    data.append(template.format(info=item))


outputs = llm.generate(data, sampling_params)
save_obj(outputs, "vllm_infer_res.pkl")

# nohup python vllm_infer.py > infer_res.log 2>&1 &
